async def cascaded_classification(
message: str,
confidence_threshold: float = 0.85
) -> dict:
"""
Try Haiku first. If confident, done.
Otherwise, escalate to Sonnet.
"""
# Step 1: Fast model
haiku_prompt = f"""
Classify sentiment: {message}
Output: positive|neutral|negative
Confidence: [0.0-1.0]
"""
haiku_response = await claude_haiku.generate(haiku_prompt)
# Parse response
sentiment = extract_sentiment(haiku_response)
confidence = extract_confidence(haiku_response)
# Step 2: Check confidence
if confidence >= confidence_threshold:
return {
"sentiment": sentiment,
"model": "haiku",
"cost": 0.0003 # Approximate
}
# Step 3: Escalate to smart model
sonnet_prompt = f"""
Classify sentiment: {message}
The fast model was uncertain. Please provide a careful analysis.
"""
sonnet_response = await claude_sonnet.generate(sonnet_prompt)
sentiment = extract_sentiment(sonnet_response)
return {
"sentiment": sentiment,
"model": "sonnet",
"cost": 0.005 # Approximate
}
# Results from production:
# - 70% handled by Haiku
# - 30% escalated to Sonnet
# - Average cost: (0.7 * $0.0003) + (0.3 * $0.005) = $0.0018
# - vs. Sonnet only: $0.005
# - Savings: 64%